pacman::p_load(MatchIt,optmatch)
setwd(wd_data_final)
data<-data_student_raw

data<-data %>%
  filter(an_bac %in% 2008:2019) %>%
  filter(liceu_repartizat!='' ) %>%
  select(grad_perc,entrance_perc,dec_town,dec,n_hs_town_group,n_students_town_yr,n_hs_town, Unemployment_hs_bac,Wages_hs_bac,drop_hs_hs_bac,
         an,specializare_bac2,scoala_de_provenienta,media_la_admitere,
         class_mean,dec_town,n_hs_town_group,
         judet_bac,town,school_change,school_mean,quart_town,
         rezultat,judet_adm,Cod_SIIIR_hs_adm,unitate_de_invatamant,liceu_repartizat,Cod_SIRUTA2_hs_adm,
         specializare_adm,specializare_lb,an_bac) %>%
  mutate(grad_perc_zero=ifelse(is.na(grad_perc),0,grad_perc)) %>%
  group_by(town,an) %>%
  mutate(grad_perc_town=mean(grad_perc,na.rm=T),
         entrance_perc_town=mean(entrance_perc,na.rm=T),
         grad_perc_town_zero=mean(grad_perc_zero,na.rm=T)) %>%
  ungroup()
 

data<-as.data.frame(data)
data$n_hs_town_group<-data$n_hs_town
data$n_hs_town_group[data$n_hs_town>=4 & data$n_hs_town<=15]<-"4-15"
data$n_hs_town_group[data$n_hs_town>15]<-"16+"
data$n_hs_town_group<-with(data, reorder(n_hs_town_group, n_hs_town))

groups<-c("1","2","3","4-15","16+")
student_bins<-c("25","35","75","150")
scale<-c("5","3","2","1","1","1","1","1","1","1")
years<-sort(unique(data$an_bac))

#match ----
data_reg<-data.frame()

for (yr in years){
  print(eval(sprintf("Year: %.0f",yr)))
  
  data_temp_yr<-data %>%
    filter(an==yr) 
  
#for (i in 1) {
for (i in 1:(length(groups)-1)) {
    n<-groups[i]
    n_1<-groups[i+1]
    
  print(eval(sprintf("High Schools: %s",n)))
  
  for (decile in 1:10 ){
    ds<-c("1","2","3","4","5","6","7","8","9","10")
    d<-ds[decile]
    
    print(eval(sprintf("Decile: %s",d)))
    data_temp<-data_temp_yr %>%
      ungroup() %>%
      filter(dec_town==d) %>%
      filter(n_hs_town_group %in% c(n,n_1)) %>%
      mutate(treatment=ifelse(n_hs_town_group %in% (n_1),1,0)) %>%
      filter(!is.na(entrance_perc) & 
               !is.na(n_students_town_yr) & 
               !is.na(an))# &

    
    x<-matchit(treatment ~ entrance_perc+n_students_town_yr+entrance_perc_town, 
               data = data_temp,
               method = "nearest",
               distance = "mahalanobis",
               caliper=c(entrance_perc=0.01,n_students_town_yr=35*2^i*1.03^(-decile*i),entrance_perc_town=0.05),
               std.caliper=F,
               replace = T)
    
    data_temp<-match.data(x)
    data_reg<-bind_rows(data_reg,data_temp)
  }
}
}
summary(x)



setwd(wd_data_final)
saveRDS(data_reg,"data_matching_nearest_loose_anon")
data_reg<-readRDS("data_matching_nearest_loose_anon")

data_reg<- data_reg %>%
  mutate(group=case_when((treatment==0 & n_hs_town_group=="1") |(treatment==1 & n_hs_town_group=="2") ~ 1,
                         (treatment==0 & n_hs_town_group=="2") |(treatment==1 & n_hs_town_group=="3") ~ 2,
                         (treatment==0 & n_hs_town_group=="3") |(treatment==1 & n_hs_town_group=="4-15") ~ 3,
                         (treatment==0 & n_hs_town_group=="4-15") |(treatment==1 & n_hs_town_group=="16+") ~ 4)) %>%
  group_by(an,entrance_perc,group) %>%
  mutate(id=cur_group_id()) %>%
  ungroup() %>%
  mutate(grad_perc_zero=ifelse(is.na(grad_perc),0,grad_perc)) %>%
  group_by(town,an) %>%
  mutate(grad_perc_town=mean(grad_perc,na.rm=T),
         entrance_perc_town=mean(entrance_perc,na.rm=T),
         grad_perc_town_zero=mean(grad_perc_zero,na.rm=T)) %>%
  ungroup()
  
  

#full model ----
model_grad<-feols(I(grad_perc*100)~entrance_perc+treatment*dec_town+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
                      as.factor(an)+specializare_bac2,
                    cluster=~id,
                    weights=~weights,
                    data=data_reg)
summary(model_grad)

model_grad_1<-feols(I(grad_perc*100)~entrance_perc+treatment*dec_town+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
                    as.factor(an)+specializare_bac2,
                  cluster=~id,
                  weights=~weights,
                  data=data_reg %>% filter(group==1))
summary(model_grad_1)



model_school_mean_1<-feols(I(school_mean*100)~entrance_perc+treatment*dec_town+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
                          as.factor(an)+specializare_bac2+scoala_de_provenienta+town,
                        cluster=~id,
                        weights=~weights,
                        data=data_reg %>% 
                          filter(group==1 ) )
summary(model_school_mean_1)

model_school_mean<-feols(I(school_mean*100)~entrance_perc+treatment*dec_town+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
                           as.factor(an)+specializare_bac2+scoala_de_provenienta+town,
                         cluster=~id,
                         weights=~weights,
                         data=data_reg)
summary(model_school_mean)

model_grad_1_tide<-feols(I(grad_perc*100)~entrance_perc+treatment+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
                             as.factor(an)+specializare_bac2+scoala_de_provenienta+town,
                           cluster=~id,
                           weights=~weights,
                           data=data_reg %>% 
                             filter(group==1 ) )
summary(model_grad_1_tide)

model_grad_tide<-feols(I(grad_perc*100)~entrance_perc+treatment+dec_town+dec_town*n_students_town_yr+Unemployment_hs_bac*dec_town+Wages_hs_bac*dec_town+drop_hs_hs_bac*dec_town|
                           as.factor(an)+specializare_bac2+scoala_de_provenienta+town,
                         cluster=~id,
                         weights=~weights,
                         data=data_reg)
summary(model_grad_tide)


f<-function(x) formatC(x, digits = 2, big.mark = ",", format = "f")


options(modelsummary_format_numeric_latex = "plain")


current_path<-rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))


fileConn<-file("a_matching.txt")
writeLines(print(modelsummary(list("2 vs 1"=model_school_mean_1,
                                   "n vs n-1"=model_school_mean,
                                   "2 vs 1"=model_grad_1,
                                   "n vs n-1"=model_grad,
                                   "2 vs 1"=model_grad_1_tide,
                                   "n vs n-1"=model_grad_tide), 
                              statistic = "std.error",
                              estimate="{estimate}{stars}",
                              stars=c('^{*}'=0.1,'^{**}'=0.05,'^{***}'=0.01),
                              output="latex",
                              #gof_map=metrics,
                              gof_omit = 'AIC|R2 Pseudo|R2 Adj|R2 Within|BIC|Log.Lik.|Sigma',
                              coef_omit='none',
                              #coef_rename=variables,
                              fmt=2,
                              escape=F)
), fileConn)
close(fileConn)

r2(model_school_mean_1)
r2(model_school_mean)
r2(model_grad_1)
r2(model_grad)
r2(model_grad_1_tide)
r2(model_grad_tide)

c(r2(model_school_mean_1,type='ar2'),
  r2(model_school_mean,type='ar2'),
  r2(model_grad_1,type='ar2'),
  r2(model_grad,type='ar2'),
  r2(model_grad_1_tide,type='ar2'),
  r2(model_grad_tide,type='ar2'))

